{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "81351281",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: stanza in c:\\users\\ataus\\anaconda3\\lib\\site-packages (1.9.2)\n",
      "Requirement already satisfied: emoji in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from stanza) (2.14.0)\n",
      "Requirement already satisfied: numpy in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from stanza) (1.26.4)\n",
      "Requirement already satisfied: protobuf>=3.15.0 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from stanza) (3.20.3)\n",
      "Requirement already satisfied: requests in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from stanza) (2.32.3)\n",
      "Requirement already satisfied: networkx in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from stanza) (3.1)\n",
      "Requirement already satisfied: torch>=1.3.0 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from stanza) (2.5.1)\n",
      "Requirement already satisfied: tqdm in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from stanza) (4.66.6)\n",
      "Requirement already satisfied: filelock in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from torch>=1.3.0->stanza) (3.13.1)\n",
      "Requirement already satisfied: typing-extensions>=4.8.0 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from torch>=1.3.0->stanza) (4.9.0)\n",
      "Requirement already satisfied: jinja2 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from torch>=1.3.0->stanza) (3.1.3)\n",
      "Requirement already satisfied: fsspec in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from torch>=1.3.0->stanza) (2023.10.0)\n",
      "Requirement already satisfied: sympy==1.13.1 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from torch>=1.3.0->stanza) (1.13.1)\n",
      "Requirement already satisfied: mpmath<1.4,>=1.1.0 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from sympy==1.13.1->torch>=1.3.0->stanza) (1.3.0)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from requests->stanza) (2.0.4)\n",
      "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from requests->stanza) (3.4)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from requests->stanza) (2.0.7)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from requests->stanza) (2024.6.2)\n",
      "Requirement already satisfied: colorama in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from tqdm->stanza) (0.4.6)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from jinja2->torch>=1.3.0->stanza) (2.1.3)\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d83a7f48076343ffb952094211671799",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.9.0.json:   0%|   …"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025-04-26 15:02:20 INFO: Downloaded file to C:\\Users\\ataus\\stanza_resources\\resources.json\n",
      "2025-04-26 15:02:20 INFO: Downloading default packages for language: en (English) ...\n",
      "2025-04-26 15:02:21 INFO: File exists: C:\\Users\\ataus\\stanza_resources\\en\\default.zip\n",
      "2025-04-26 15:02:25 INFO: Finished downloading models and saved to C:\\Users\\ataus\\stanza_resources\n",
      "2025-04-26 15:02:25 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8f5c86fb40d74997bdbb07ec8e913517",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.9.0.json:   0%|   …"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2025-04-26 15:02:26 INFO: Downloaded file to C:\\Users\\ataus\\stanza_resources\\resources.json\n",
      "2025-04-26 15:02:26 WARNING: Language en package default expects mwt, which has been added\n",
      "2025-04-26 15:02:26 INFO: Loading these models for language: en (English):\n",
      "==============================\n",
      "| Processor | Package        |\n",
      "------------------------------\n",
      "| tokenize  | combined       |\n",
      "| mwt       | combined       |\n",
      "| sentiment | sstplus_charlm |\n",
      "==============================\n",
      "\n",
      "2025-04-26 15:02:26 INFO: Using device: cpu\n",
      "2025-04-26 15:02:26 INFO: Loading: tokenize\n",
      "C:\\Users\\ataus\\anaconda3\\Lib\\site-packages\\stanza\\models\\tokenization\\trainer.py:82: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
      "  checkpoint = torch.load(filename, lambda storage, loc: storage)\n",
      "2025-04-26 15:02:28 INFO: Loading: mwt\n",
      "C:\\Users\\ataus\\anaconda3\\Lib\\site-packages\\stanza\\models\\mwt\\trainer.py:201: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
      "  checkpoint = torch.load(filename, lambda storage, loc: storage)\n",
      "2025-04-26 15:02:28 INFO: Loading: sentiment\n",
      "C:\\Users\\ataus\\anaconda3\\Lib\\site-packages\\stanza\\models\\classifiers\\trainer.py:72: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
      "  checkpoint = torch.load(filename, lambda storage, loc: storage)\n",
      "C:\\Users\\ataus\\anaconda3\\Lib\\site-packages\\stanza\\models\\common\\char_model.py:271: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
      "  state = torch.load(filename, lambda storage, loc: storage)\n",
      "C:\\Users\\ataus\\anaconda3\\Lib\\site-packages\\stanza\\models\\common\\pretrain.py:56: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
      "  data = torch.load(self.filename, lambda storage, loc: storage)\n",
      "2025-04-26 15:02:29 INFO: Done loading processors!\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: ipykernel in c:\\users\\ataus\\anaconda3\\lib\\site-packages (6.28.0)\n",
      "Requirement already satisfied: comm>=0.1.1 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipykernel) (0.2.2)\n",
      "Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipykernel) (1.6.7)\n",
      "Requirement already satisfied: ipython>=7.23.1 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipykernel) (8.20.0)\n",
      "Requirement already satisfied: jupyter-client>=6.1.12 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipykernel) (8.6.0)\n",
      "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipykernel) (5.5.0)\n",
      "Requirement already satisfied: matplotlib-inline>=0.1 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipykernel) (0.1.6)\n",
      "Requirement already satisfied: nest-asyncio in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipykernel) (1.6.0)\n",
      "Requirement already satisfied: packaging in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipykernel) (23.1)\n",
      "Requirement already satisfied: psutil in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipykernel) (5.9.0)\n",
      "Requirement already satisfied: pyzmq>=24 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipykernel) (25.1.2)\n",
      "Requirement already satisfied: tornado>=6.1 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipykernel) (6.3.3)\n",
      "Requirement already satisfied: traitlets>=5.4.0 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipykernel) (5.7.1)\n",
      "Requirement already satisfied: decorator in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (5.1.1)\n",
      "Requirement already satisfied: jedi>=0.16 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.18.1)\n",
      "Requirement already satisfied: prompt-toolkit<3.1.0,>=3.0.41 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (3.0.43)\n",
      "Requirement already satisfied: pygments>=2.4.0 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (2.15.1)\n",
      "Requirement already satisfied: stack-data in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.2.0)\n",
      "Requirement already satisfied: colorama in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.4.6)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from jupyter-client>=6.1.12->ipykernel) (2.8.2)\n",
      "Requirement already satisfied: platformdirs>=2.5 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel) (3.10.0)\n",
      "Requirement already satisfied: pywin32>=300 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel) (305.1)\n",
      "Requirement already satisfied: parso<0.9.0,>=0.8.0 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel) (0.8.3)\n",
      "Requirement already satisfied: wcwidth in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from prompt-toolkit<3.1.0,>=3.0.41->ipython>=7.23.1->ipykernel) (0.2.5)\n",
      "Requirement already satisfied: six>=1.5 in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from python-dateutil>=2.8.2->jupyter-client>=6.1.12->ipykernel) (1.16.0)\n",
      "Requirement already satisfied: executing in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from stack-data->ipython>=7.23.1->ipykernel) (0.8.3)\n",
      "Requirement already satisfied: asttokens in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from stack-data->ipython>=7.23.1->ipykernel) (2.0.5)\n",
      "Requirement already satisfied: pure-eval in c:\\users\\ataus\\anaconda3\\lib\\site-packages (from stack-data->ipython>=7.23.1->ipykernel) (0.2.2)\n"
     ]
    }
   ],
   "source": [
    "!pip install stanza\n",
    "import stanza\n",
    "stanza.download('en')\n",
    "nlp = stanza.Pipeline(lang='en', processors='tokenize,sentiment')\n",
    "from statistics import mean\n",
    "import pandas as pd\n",
    "#pip install string\n",
    "import re\n",
    "import string\n",
    "#conda install -c conda-forge textblob\n",
    "#conda update -n base -c conda-forge conda\n",
    "from textblob import TextBlob\n",
    "import numpy as np\n",
    "#pip install vaderSentiment\n",
    "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n",
    "vader = SentimentIntensityAnalyzer()\n",
    "!pip install ipykernel\n",
    "#python -m ipykernel install --user"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f8252ed2",
   "metadata": {},
   "outputs": [],
   "source": [
    "print ('Citation: Peng Qi, Yuhao Zhang, Yuhui Zhang, Jason Bolton and Christopher D. Manning. 2020. Stanza: A Python Natural Language Processing Toolkit for Many Human Languages. In Association for Computational Linguistics (ACL) System Demonstrations. 2020. [pdf][bib]')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8dd1afa6-0741-4775-a673-cfd4ce465478",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b3fcf6a3-0d4c-491a-ab56-4a44724ad069",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Figure 2c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f45ad76",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = pd.ExcelFile(r'C:\\Users\\df1.xlsx').parse('Sheet1')\n",
    "print(df1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f94713bc-c2a1-4d6a-8579-4828de8448c6",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(df1.columns)\n",
    "print(df1.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "778f66d7-8c72-4135-bcbf-c3cf75f7caa5",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1['open1'].str.replace('[{}]'.format(string.punctuation), '')\n",
    "df1['edited1'] = df1['open1'].str.replace('\\d+', '')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2b6c22a5",
   "metadata": {},
   "outputs": [],
   "source": [
    "edited1=[]\n",
    "edited1.append(df1['edited1'])\n",
    "\n",
    "def sentiment_stanza1(edited1):\n",
    "    doc = nlp(str(edited1))\n",
    "    return mean([x.sentiment-1 for x in doc.sentences])\n",
    "\n",
    "df1['stanza_edited'] = df1.open1.apply(lambda sentence: sentiment_stanza1(sentence))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ee700722",
   "metadata": {},
   "outputs": [],
   "source": [
    "textblob_edited1=[]\n",
    "textblob_edited1.append(df1['edited1'])\n",
    "\n",
    "def sentiment_textblob(edited1):\n",
    "    testimonial = TextBlob(str(edited1))\n",
    "    return testimonial.sentiment.polarity\n",
    "\n",
    "df1['textblob_edited'] = df1.open1.apply(lambda sentence: sentiment_textblob(sentence))\n",
    "df1['textblob_discrete_edited'] = df1.textblob_edited.apply(lambda x: -1 if x<0 else 1 if x>0 else 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a57f8c1d-7c46-46f1-bfd2-859fe2de20e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "def sentiment_vader1(edited1):\n",
    "    vader_sentiment = vader.polarity_scores(str(edited1))\n",
    "    return vader_sentiment['compound']\n",
    "\n",
    "df1['vader_edited'] = df1['edited1'].apply(lambda sentence: sentiment_vader1(sentence))\n",
    "\n",
    "print(df1[['edited1', 'vader_edited']].head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "09a02277",
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.to_excel(r'C:\\Users\\df1nlp.xlsx', sheet_name='Sheet1', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "252d0278-2b49-4eb7-b607-827cb2ef596e",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99184bdd-6bc1-49af-bf7f-4aa739a22e97",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Figure 4d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "f333d004-48d8-4c74-837b-5cb15dd8fb18",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>open3</th>\n",
       "      <th>exp2_groups</th>\n",
       "      <th>exp2_results</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>coupled   belief   address  correct may  enough</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>placed  party   home</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>need   likely      reasonable doubt threshold</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>eyewitnesses   past  misidentified potential s...</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>eye witness accounts  unreliable</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>984</th>\n",
       "      <td>sufficient enough   proved  preponderance   ...</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>985</th>\n",
       "      <td>guaranteed</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>986</th>\n",
       "      <td>witness   relatively sure  recognized  susp...</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>987</th>\n",
       "      <td>id need  evidence   percent certainty</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>988</th>\n",
       "      <td>percent   information is not  person   sai...</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>989 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 open3  exp2_groups  \\\n",
       "0      coupled   belief   address  correct may  enough            3   \n",
       "1                                 placed  party   home            3   \n",
       "2        need   likely      reasonable doubt threshold            3   \n",
       "3    eyewitnesses   past  misidentified potential s...            2   \n",
       "4                     eye witness accounts  unreliable            2   \n",
       "..                                                 ...          ...   \n",
       "984    sufficient enough   proved  preponderance   ...            2   \n",
       "985                                       guaranteed              3   \n",
       "986     witness   relatively sure  recognized  susp...            1   \n",
       "987              id need  evidence   percent certainty            1   \n",
       "988      percent   information is not  person   sai...            1   \n",
       "\n",
       "     exp2_results  \n",
       "0               4  \n",
       "1               5  \n",
       "2               4  \n",
       "3               1  \n",
       "4               3  \n",
       "..            ...  \n",
       "984             5  \n",
       "985             4  \n",
       "986             5  \n",
       "987             3  \n",
       "988             2  \n",
       "\n",
       "[989 rows x 3 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3 = pd.ExcelFile(r'C:\\Users\\ataus\\Dropbox\\CRASA\\Judicial Survey\\Open-Ended Items Analysis\\df3.xlsx').parse('Sheet1')\n",
    "df3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "ec06f152-8b5d-4818-81b3-8c415778e7f0",
   "metadata": {},
   "outputs": [],
   "source": [
    "df3['open3'].str.replace('[{}]'.format(string.punctuation), '')\n",
    "df3['edited3'] = df3['open3'].str.replace('\\d+', '')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c1a8bff1-ed14-4f18-81ee-bfa6ad7757a8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0      0.0\n",
       "1      0.0\n",
       "2      0.0\n",
       "3     -1.0\n",
       "4     -1.0\n",
       "      ... \n",
       "984    0.0\n",
       "985    0.0\n",
       "986    0.0\n",
       "987    0.0\n",
       "988   -1.0\n",
       "Name: stanza_edited3, Length: 989, dtype: float64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "edited3=[]\n",
    "edited3.append(df3['edited3'])\n",
    "\n",
    "def sentiment_stanza3(text):\n",
    "    doc = nlp(str(text))\n",
    "    sentiments = [x.sentiment - 1 for x in doc.sentences]\n",
    "    if sentiments:\n",
    "        return mean(sentiments)\n",
    "    else:\n",
    "        return None  # or 0, or any fallback you want\n",
    "\n",
    "df3['stanza_edited3'] = df3.edited3.apply(lambda sentence: sentiment_stanza3(sentence))\n",
    "df3['stanza_edited3'] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "0e225969-ade9-4c5c-a845-e7b8c2419c33",
   "metadata": {},
   "outputs": [],
   "source": [
    "df3['stanza_discrete_edited3'] = df3.stanza_edited3.apply(lambda x: -1 if x<0 else 1 if x>0 else 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f12d7eb2-8180-4ac3-a2fc-7463ea99672f",
   "metadata": {},
   "outputs": [],
   "source": [
    "textblob_edited3=[]\n",
    "textblob_edited3.append(df3['edited3'])\n",
    "\n",
    "def sentiment_textblob(edited3):\n",
    "    testimonial = TextBlob(str(edited3))\n",
    "    return testimonial.sentiment.polarity\n",
    "\n",
    "df3['textblob_edited3'] = df3.edited3.apply(lambda sentence: sentiment_textblob(sentence))\n",
    "df3['textblob_discrete_edited5'] = df3.textblob_edited3.apply(lambda x: -1 if x<0 else 1 if x>0 else 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "fcf510de-8469-4bdf-be61-692d975f6d27",
   "metadata": {},
   "outputs": [],
   "source": [
    "vader_edited3=[]\n",
    "vader_edited3.append(df3['edited3'])\n",
    "\n",
    "def sentiment_vader1(edited3):\n",
    "    vader_sentiment = vader.polarity_scores(str(edited3))\n",
    "    return vader_sentiment['compound']\n",
    "\n",
    "vader_edited3 = ''.join(map(str, edited3))\n",
    "\n",
    "df3['vader_edited3'] = df3.edited3.apply(lambda sentence: sentiment_vader1(sentence))\n",
    "df3['vader_discrete_edited3'] = df3.vader_edited3.apply(lambda x: -1 if x<0 else 1 if x>0 else 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "a6ebe2d7-fd84-4c0f-bcfa-38cab5cfc95a",
   "metadata": {},
   "outputs": [
    {
     "ename": "PermissionError",
     "evalue": "[Errno 13] Permission denied: 'C:\\\\Users\\\\df3nlp.xlsx'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mPermissionError\u001b[0m                           Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[11], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m df3\u001b[38;5;241m.\u001b[39mto_excel(\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mC:\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mUsers\u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mdf3nlp.xlsx\u001b[39m\u001b[38;5;124m'\u001b[39m, sheet_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mSheet1\u001b[39m\u001b[38;5;124m'\u001b[39m, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\core\\generic.py:2345\u001b[0m, in \u001b[0;36mNDFrame.to_excel\u001b[1;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, inf_rep, freeze_panes, storage_options, engine_kwargs)\u001b[0m\n\u001b[0;32m   2332\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mio\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mformats\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mexcel\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ExcelFormatter\n\u001b[0;32m   2334\u001b[0m formatter \u001b[38;5;241m=\u001b[39m ExcelFormatter(\n\u001b[0;32m   2335\u001b[0m     df,\n\u001b[0;32m   2336\u001b[0m     na_rep\u001b[38;5;241m=\u001b[39mna_rep,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   2343\u001b[0m     inf_rep\u001b[38;5;241m=\u001b[39minf_rep,\n\u001b[0;32m   2344\u001b[0m )\n\u001b[1;32m-> 2345\u001b[0m formatter\u001b[38;5;241m.\u001b[39mwrite(\n\u001b[0;32m   2346\u001b[0m     excel_writer,\n\u001b[0;32m   2347\u001b[0m     sheet_name\u001b[38;5;241m=\u001b[39msheet_name,\n\u001b[0;32m   2348\u001b[0m     startrow\u001b[38;5;241m=\u001b[39mstartrow,\n\u001b[0;32m   2349\u001b[0m     startcol\u001b[38;5;241m=\u001b[39mstartcol,\n\u001b[0;32m   2350\u001b[0m     freeze_panes\u001b[38;5;241m=\u001b[39mfreeze_panes,\n\u001b[0;32m   2351\u001b[0m     engine\u001b[38;5;241m=\u001b[39mengine,\n\u001b[0;32m   2352\u001b[0m     storage_options\u001b[38;5;241m=\u001b[39mstorage_options,\n\u001b[0;32m   2353\u001b[0m     engine_kwargs\u001b[38;5;241m=\u001b[39mengine_kwargs,\n\u001b[0;32m   2354\u001b[0m )\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\io\\formats\\excel.py:946\u001b[0m, in \u001b[0;36mExcelFormatter.write\u001b[1;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options, engine_kwargs)\u001b[0m\n\u001b[0;32m    942\u001b[0m     need_save \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m    943\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    944\u001b[0m     \u001b[38;5;66;03m# error: Cannot instantiate abstract class 'ExcelWriter' with abstract\u001b[39;00m\n\u001b[0;32m    945\u001b[0m     \u001b[38;5;66;03m# attributes 'engine', 'save', 'supported_extensions' and 'write_cells'\u001b[39;00m\n\u001b[1;32m--> 946\u001b[0m     writer \u001b[38;5;241m=\u001b[39m ExcelWriter(  \u001b[38;5;66;03m# type: ignore[abstract]\u001b[39;00m\n\u001b[0;32m    947\u001b[0m         writer,\n\u001b[0;32m    948\u001b[0m         engine\u001b[38;5;241m=\u001b[39mengine,\n\u001b[0;32m    949\u001b[0m         storage_options\u001b[38;5;241m=\u001b[39mstorage_options,\n\u001b[0;32m    950\u001b[0m         engine_kwargs\u001b[38;5;241m=\u001b[39mengine_kwargs,\n\u001b[0;32m    951\u001b[0m     )\n\u001b[0;32m    952\u001b[0m     need_save \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m    954\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\io\\excel\\_openpyxl.py:61\u001b[0m, in \u001b[0;36mOpenpyxlWriter.__init__\u001b[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs, **kwargs)\u001b[0m\n\u001b[0;32m     57\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mopenpyxl\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mworkbook\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Workbook\n\u001b[0;32m     59\u001b[0m engine_kwargs \u001b[38;5;241m=\u001b[39m combine_kwargs(engine_kwargs, kwargs)\n\u001b[1;32m---> 61\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(\n\u001b[0;32m     62\u001b[0m     path,\n\u001b[0;32m     63\u001b[0m     mode\u001b[38;5;241m=\u001b[39mmode,\n\u001b[0;32m     64\u001b[0m     storage_options\u001b[38;5;241m=\u001b[39mstorage_options,\n\u001b[0;32m     65\u001b[0m     if_sheet_exists\u001b[38;5;241m=\u001b[39mif_sheet_exists,\n\u001b[0;32m     66\u001b[0m     engine_kwargs\u001b[38;5;241m=\u001b[39mengine_kwargs,\n\u001b[0;32m     67\u001b[0m )\n\u001b[0;32m     69\u001b[0m \u001b[38;5;66;03m# ExcelWriter replaced \"a\" by \"r+\" to allow us to first read the excel file from\u001b[39;00m\n\u001b[0;32m     70\u001b[0m \u001b[38;5;66;03m# the file and later write to it\u001b[39;00m\n\u001b[0;32m     71\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr+\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_mode:  \u001b[38;5;66;03m# Load from existing workbook\u001b[39;00m\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\io\\excel\\_base.py:1263\u001b[0m, in \u001b[0;36mExcelWriter.__init__\u001b[1;34m(self, path, engine, date_format, datetime_format, mode, storage_options, if_sheet_exists, engine_kwargs)\u001b[0m\n\u001b[0;32m   1259\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_handles \u001b[38;5;241m=\u001b[39m IOHandles(\n\u001b[0;32m   1260\u001b[0m     cast(IO[\u001b[38;5;28mbytes\u001b[39m], path), compression\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcompression\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mNone\u001b[39;00m}\n\u001b[0;32m   1261\u001b[0m )\n\u001b[0;32m   1262\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(path, ExcelWriter):\n\u001b[1;32m-> 1263\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_handles \u001b[38;5;241m=\u001b[39m get_handle(\n\u001b[0;32m   1264\u001b[0m         path, mode, storage_options\u001b[38;5;241m=\u001b[39mstorage_options, is_text\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m   1265\u001b[0m     )\n\u001b[0;32m   1266\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cur_sheet \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m   1268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m date_format \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
      "File \u001b[1;32m~\\anaconda3\\Lib\\site-packages\\pandas\\io\\common.py:872\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m    863\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(\n\u001b[0;32m    864\u001b[0m             handle,\n\u001b[0;32m    865\u001b[0m             ioargs\u001b[38;5;241m.\u001b[39mmode,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    868\u001b[0m             newline\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m    869\u001b[0m         )\n\u001b[0;32m    870\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    871\u001b[0m         \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[1;32m--> 872\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n\u001b[0;32m    873\u001b[0m     handles\u001b[38;5;241m.\u001b[39mappend(handle)\n\u001b[0;32m    875\u001b[0m \u001b[38;5;66;03m# Convert BytesIO or file objects passed with an encoding\u001b[39;00m\n",
      "\u001b[1;31mPermissionError\u001b[0m: [Errno 13] Permission denied: 'C:\\\\Users\\\\df3nlp.xlsx'"
     ]
    }
   ],
   "source": [
    "df3.to_excel(r'C:\\Users\\df3nlp.xlsx', sheet_name='Sheet1', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b7745a87-7b3b-4890-8543-f1941670c17f",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e6f5579f-f199-4e6f-9b46-3c8625ed6dc2",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Figure 7c"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "41929515",
   "metadata": {},
   "outputs": [],
   "source": [
    "df5 = pd.ExcelFile(r'C:\\Users\\df5.xlsx').parse('Sheet1')\n",
    "df5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8009e80d",
   "metadata": {},
   "outputs": [],
   "source": [
    "df5['open5'].str.replace('[{}]'.format(string.punctuation), '')\n",
    "df5['edited5'] = df5['open5'].str.replace('\\d+', '')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f47aee87",
   "metadata": {},
   "outputs": [],
   "source": [
    "edited5=[]\n",
    "edited5.append(df5['edited5'])\n",
    "\n",
    "def sentiment_stanza5(edited5):\n",
    "    doc = nlp(str(edited5))\n",
    "    return mean([x.sentiment-1 for x in doc.sentences])\n",
    "\n",
    "df5['stanza_edited5'] = df5.edited5.apply(lambda sentence: sentiment_stanza5(sentence))\n",
    "df5['stanza_edited5'] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e9281a7f",
   "metadata": {},
   "outputs": [],
   "source": [
    "df5['stanza_discrete_edited5'] = df5.stanza_edited5.apply(lambda x: -1 if x<0 else 1 if x>0 else 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "515238b6",
   "metadata": {},
   "outputs": [],
   "source": [
    "textblob_edited5=[]\n",
    "textblob_edited5.append(df5['edited5'])\n",
    "\n",
    "def sentiment_textblob(edited5):\n",
    "    testimonial = TextBlob(str(edited5))\n",
    "    return testimonial.sentiment.polarity\n",
    "\n",
    "df5['textblob_edited5'] = df5.edited5.apply(lambda sentence: sentiment_textblob(sentence))\n",
    "df5['textblob_discrete_edited5'] = df5.textblob_edited5.apply(lambda x: -1 if x<0 else 1 if x>0 else 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "871873b7",
   "metadata": {},
   "outputs": [],
   "source": [
    "vader_edited5=[]\n",
    "vader_edited5.append(df5['edited5'])\n",
    "\n",
    "def sentiment_vader1(edited5):\n",
    "    vader_sentiment = vader.polarity_scores(str(edited5))\n",
    "    return vader_sentiment['compound']\n",
    "\n",
    "vader_edited5 = ''.join(map(str, edited5))\n",
    "\n",
    "df5['vader_edited5'] = df5.edited5.apply(lambda sentence: sentiment_vader1(sentence))\n",
    "df5['vader_discrete_edited5'] = df5.vader_edited5.apply(lambda x: -1 if x<0 else 1 if x>0 else 0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "896badd7",
   "metadata": {},
   "outputs": [],
   "source": [
    "df5.to_excel(r'C:\\Users\\df5nlp.xlsx', sheet_name='Sheet1', index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
